######################################################################
#Replication file for "The Impact of China's AIIB on the World Bank"
#Authors: Jing Qian, James Vreeland, Jianzhi Zhao
#Codes to replicate Table E.1
######################################################################
rm(list = ls())

#To install the bpCausal package (ver 0.0.1)
# install.packages('devtools', repos = 'http://cran.us.r-project.org') # if not already installed
# devtools::install_github('liulch/bpCausal')

#Load bpCausal package
library(bpCausal)

#Other packages used
library(tidyverse)
library(RColorBrewer)

#Setwd
setwd("C:/Users/qianj/Dropbox (Princeton)/AIIB_WB_Replication")

#Load custom functions
#Note: Given the number of tests performed in the appendix, 
#several custom functions are created to implement estimation methods in a more efficient way.
#These functions merely create a wrap-up of existing functions, like bpCausal and gsynth,
#in order to avoid typing parameters repeatedly.

source("code/custom_functions.R")

###################################
#Replicate Table E.1
###################################
#--------------
#(1) Load data
#--------------
load("data/data_main.RData")

#--------------
#(2) Setup
#--------------
D.use = "aiib_founder_2016"
covs.all = c("gdppc_log_lag",
             "population_log_lag",
             "election_either_lag",
             "fdi_gdp_lag",
             "debt_gni_lag",
             "oda_gni_lag",
             "polity2_lag",
             "unsc_lag",
             "IdealPointDistance_lag")
index.use = c("iso2c", "year")
df.use = df.main

#--------------
#(3) Estimation: Table E.1, column (1) 
#~1.5 minutes
#--------------
dv.list = c("hard_amount_project_all_2_log")

#No covariate
X.use = Z.use = A.use = c()

#Estimate
tab.e1.1 = bpcausal.group(dv.list = dv.list,
                          df.use = df.use,
                          D.use = D.use,
                          X.use = X.use,
                          Z.use = Z.use,
                          A.use = A.use)

#Get estimated coefficients and intervals
att.tab.e1.1 = effSummary(tab.e1.1$hard_amount_project_all_2_log)
att.tab.e1.1$est.avg #Estimated ATT as reported in Table E.1, Column (1)
att.tab.e1.1$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table E.1

#Number of observations
df.use.complete = df.main[, c(dv.list, D.use, index.use, "aiib_founder")] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#Treated and Control Units
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units

#--------------
#(4) Estimation: Table E.1, column (2) 
#~3 minutes
#--------------
dv.list = c("hard_amount_project_all_2_log")

#No covariate
X.use = Z.use = A.use = covs.all

#Estimate
tab.e1.2 = bpcausal.group(dv.list = dv.list,
                          df.use = df.use,
                          D.use = D.use,
                          X.use = X.use,
                          Z.use = Z.use,
                          A.use = A.use)

#Get estimated coefficients and intervals
att.tab.e1.2 = effSummary(tab.e1.2$hard_amount_project_all_2_log)
att.tab.e1.2$est.avg #Estimated ATT as reported in Table E.1, Column (2)
att.tab.e1.2$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table E.1

coef.tab.e1.2 = coefSummary(tab.e1.2$hard_amount_project_all_2_log)
coef.tab.e1.2$est.beta[-1,] #Estimated invariant component of covariate coefficients (ignore the intercept in first row)
coef.tab.e1.2$est.beta[-1,] %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table E.1

#Number of observations
df.use.complete = df.main[, c(dv.list, D.use, covs.all, index.use, "aiib_founder")] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#Treated and Control Units
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units


#--------------
#(5) Estimation: Table E.1, column (3) 
#~1.5 minutes
#--------------
dv.list = c("hard_amount_project_all_log")

#No covariate
X.use = Z.use = A.use = c()

#Estimate
tab.e1.3 = bpcausal.group(dv.list = dv.list,
                          df.use = df.use,
                          D.use = D.use,
                          X.use = X.use,
                          Z.use = Z.use,
                          A.use = A.use)

#Get estimated coefficients and intervals
att.tab.e1.3 = effSummary(tab.e1.3$hard_amount_project_all_log)
att.tab.e1.3$est.avg #Estimated ATT as reported in Table E.1, Column (3)
att.tab.e1.3$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table E.1

#Number of observations
df.use.complete = df.main[, c(dv.list, D.use, index.use, "aiib_founder")] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#Treated and Control Units
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units

#--------------
#(6) Estimation: Table E.1, column (4) 
#~3 minutes
#--------------
dv.list = c("hard_amount_project_all_log")

#No covariate
X.use = Z.use = A.use = covs.all

#Estimate
tab.e1.4 = bpcausal.group(dv.list = dv.list,
                          df.use = df.use,
                          D.use = D.use,
                          X.use = X.use,
                          Z.use = Z.use,
                          A.use = A.use)

#Get estimated coefficients and intervals
att.tab.e1.4 = effSummary(tab.e1.4$hard_amount_project_all_log)
att.tab.e1.4$est.avg #Estimated ATT as reported in Table E.1, Column (4)
att.tab.e1.4$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table E.1

coef.tab.e1.4 = coefSummary(tab.e1.4$hard_amount_project_all_log)
coef.tab.e1.4$est.beta[-1,] #Estimated invariant component of covariate coefficients (ignore the intercept in first row)
coef.tab.e1.4$est.beta[-1,] %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table E.1

#Number of observations
df.use.complete = df.main[, c(dv.list, D.use, covs.all, index.use, "aiib_founder")] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#Treated and Control Units
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units
